libname home "/scratch/baruch/";

option compress=yes;

%let timer = %sysfunc(datetime());
options nonotes;

%macro NASDAQ_after2015(year1, year2);

proc datasets nolist lib=work; delete _out; quit;

%do date = &year1 %to &year2;
%if %sysfunc(exist(taqmsec.ctm_&date.)) %then %do;

proc datasets nolist lib=work; 
 delete _f1 _f11 _f11_unique _f12 _f1_unique 
        _f2 _f3 _s1 _s2 _s3 _t1 _t11 _t11_unique _t12 _t1_unique _t2 _t3 _oneday _onedayy; 
run; quit;

********************************************************************************************;
********************************************************************************************;
***** 00:00 to 9:45 ************************************************************************;

data _f1; 
 set taqmsec.ctm_&date.; 
 keep DATE TIME_M EX SYM_ROOT SYM_SUFFIX SIZE symbol TR_SCOND;
 where Tr_Corr='00' and size>0 and price>1 and
       /* trades during 00:00 to 9:45, submitted from NASDAQ */
       time_m >= hms(0,00,0) and time_m <= hms(9,45,0) and ex in ("T" "Q");
 symbol=catx(' ',sym_root, sym_suffix);
run;

/* In TAQ, TR_SCOND = "O" or "Q" are indicators for opening auction,
           TR_SCOND = "M" or "6" are indicators for closing auction */
/* Sometimes, TAQ reports duplicates trades for closing and opening, 
           that is, using "O" to report once, and then immediately uses "Q" to report the same trade again */
/* So we need to eliminate these potential duplicates */

/* opening auction trades, eliminate potential duplicates */
data _f11; set _f1; where TR_SCOND contains 'O' or TR_SCOND contains 'Q'; run;
proc sort data= _f11 out= _f11_unique nodupkey; by date symbol ex size; run;

/* regular trades */
data _f12; set _f1; where TR_SCOND not contains 'O' and TR_SCOND not contains 'Q'; run;

/* combine non-duplicate opening auction trades with non-auction trades */
data _f1_unique; set _f11_unique _f12; run;

/* sum the trades during 00:00 to 9:45 */
proc sql;
 create table _f2 as 
 select distinct date, symbol, ex, sum(size) as volume
 from _f1_unique
 group by date, symbol, ex
 order by date, symbol, ex 
;
quit;

/* rearrange results */
data _f3; retain DATE time symbol ex volume; set _f2; time = hms(9,45,0); format time TIME20.9; run;


********************************************************************************************;
********************************************************************************************;
***** 9:45 to 15:45 ************************************************************************;

data _s1; 
 set taqmsec.ctm_&date.; 
 keep DATE TIME_M EX SYM_ROOT SYM_SUFFIX SIZE symbol TR_SCOND;
 where Tr_Corr='00' and size>0 and price>1 and
       time_m >= hms(9,45,0) and time_m <= hms(15,45,0) and ex in ("T" "Q");
 symbol=catx(' ',sym_root, sym_suffix);
run;

%let t = %sysfunc(hms(9,45,0)); %let interval = 15*60; 
data _s2; set _s1; format time TIME20.9; 
 %do %until (&t. >= %sysfunc(hms(15,45,0)));
  %let start = %sysevalf(&t.); %let end = %sysevalf(&t. + &interval.);   
  if time_m >= &start. and time_m <= &end. then do; time = &end.; end;
  %let t = %sysevalf(&t. + &interval.);
 %end;
run;

proc sql;
 create table _s3 as
 select distinct date, time, symbol, ex, sum(size) as volume
 from _s2
 group by date, symbol, ex, time
 order by date, symbol, ex, time
;
quit;

********************************************************************************************;
********************************************************************************************;
***** 15:45 to 16:01 ***********************************************************************;

data _t1; 
 set taqmsec.ctm_&date.; 
 keep DATE TIME_M EX SYM_ROOT SYM_SUFFIX SIZE symbol TR_SCOND;
 where Tr_Corr='00' and size>0 and price>1 and
       /* trades during 15:45 to 16:01, submitted from NASDAQ */
       /* take extra 1 minutes to include all the trades submitted from 16:00 to 16:01 */
       time_m >= hms(15,45,0) and time_m <= hms(16,01,0) and ex in ("T" "Q");
 symbol=catx(' ',sym_root, sym_suffix);
run;

/* In TAQ, TR_SCOND = "O" or "Q" are indicators for opening auction,
           TR_SCOND = "M" or "6" are indicators for closing auction */
/* Sometimes, TAQ reports duplicates trades for closing and opening, 
           that is, using "M" to report once, and then immediately uses "6" to report the same trade again */
/* So we need to eliminate these potential duplicates */

/* closing auction trades */
data _t11; set _t1; where TR_SCOND contains 'M' or TR_SCOND contains '6'; run;
proc sort data= _t11 out= _t11_unique nodupkey; by date symbol ex size; run;

/* regular trades */
data _t12; set _t1; where TR_SCOND not contains 'M' and TR_SCOND not contains '6'; run;

/* combine non-duplicate closing auction trades with non-auction trades */
data _t1_unique; set _t12 _t11_unique; run;

proc sql;
 create table _t2 as 
 select distinct date, symbol, ex, sum(size) as volume
 from _t1_unique
 group by date, symbol, ex 
 order by date, symbol, ex
;
quit;

data _t3; retain DATE time symbol EX volume; set _t2; time = hms(16,01,0); format time TIME20.9; run;


********************************************************************************************;
********************************************************************************************;
***** 00:00 to 16:01: one day **************************************************************;

data _oneday; set _f3 _s3 _t3; run;
proc sql;
	create table _onedayy as
	select a.*, b.cusip
	from _oneday a left join taqmsec.mastm_&date. as b
	on a.date=b.date and a.symbol=b.symbol_15 
	order by date, symbol, ex, time;
quit;


********************************************************************************************;
********************************************************************************************;
***** 00:00 to 16:01: append all days*******************************************************;
proc datasets nolist lib=work; append base=_out data=_onedayy force; run; quit;
%end; %end;

%mend;


%NASDAQ_after2015(year1=20150101, year2=20211231);

/* save data to home dictionary */
data home.NASDAQ15to21; set _out; run;

options notes;
data _null_;
dur = datetime() - &timer;
put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
run; 

